- Eksploratorna analiza (identificiranje hipotez)
- relativna primerjava
- identificiranje vzročnosti, mehanizma vpliva, razlage
- opazovanje več kot dveh spremenljivk
- Bolj jasna in prepričljiva predstavitev podatkov
ggplot2ggplot2data.frame)aes - estestke preslikave v barvo, obliko in velikostgeoms - geometrijski objekti (točke, črte, liki)facets - izrisi pogojno na vrednosti faktorjevstats - statistične transformacije (delitev v koše, kvantili, glajenje)scales - lestvicerequire(ggplot2) require(dplyr)
head(Orange)
## Tree age circumference ## 1 1 118 30 ## 2 1 484 58 ## 3 1 664 87 ## 4 1 1004 115 ## 5 1 1231 120 ## 6 1 1372 142
ggplot(data=Orange, aes(x=Orange$age, y=Orange$circumference)) + geom_point()
ggplot(data=Orange %>% filter(Tree==1), aes(x=age, y=circumference)) + geom_point()
ggplot(data=Orange %>% filter(Tree==1), aes(x=age, y=circumference)) + geom_line()
ggplot(data=Orange, aes(x=Tree, y=circumference)) + geom_boxplot()
ggplot(data=Orange, aes(x=Tree, y=circumference)) + geom_boxplot() + geom_point()
ggplot(data=Orange, aes(x=circumference)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data=Orange, aes(x=circumference)) + geom_histogram(binwidth=50)
ggplot(data=Orange, aes(x=circumference)) + geom_histogram(binwidth=50) + geom_vline(xintercept=median(Orange$circumference), col="red")
ggplot(data=Orange, aes(x=circumference)) + geom_density()
ggplot(data=Orange, aes(x=circumference, y=age)) + geom_point() + facet_grid(~Tree)
levels(Orange$Tree) <- sort(levels(Orange$Tree)) ggplot(data=Orange, aes(x=circumference, y=age)) + geom_point() + geom_line(col="red") + facet_wrap(~Tree, ncol=2)
ggplot(data=Orange, aes(x=circumference, y=age, col=Tree)) + geom_line()
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species ## 1 5.1 3.5 1.4 0.2 setosa ## 2 4.9 3.0 1.4 0.2 setosa ## 3 4.7 3.2 1.3 0.2 setosa ## 4 4.6 3.1 1.5 0.2 setosa ## 5 5.0 3.6 1.4 0.2 setosa ## 6 5.4 3.9 1.7 0.4 setosa
ggplot(data=iris, aes(x=Petal.Length, fill=Species)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data=iris, aes(x=Petal.Length, color=Species)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data=iris, aes(x=Petal.Length, fill=Species)) + geom_histogram(color="black")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
require(ggplot2movies)
## Loading required package: ggplot2movies
head(movies)
## # A tibble: 6 x 24 ## title year length budget rating votes r1 r2 r3 r4 r5 ## <chr> <int> <int> <int> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 $ 1971 121 NA 6.4 348 4.5 4.5 4.5 4.5 14.5 ## 2 $100… 1939 71 NA 6 20 0 14.5 4.5 24.5 14.5 ## 3 $21 … 1941 7 NA 8.2 5 0 0 0 0 0 ## 4 $40,… 1996 70 NA 8.2 6 14.5 0 0 0 0 ## 5 $50,… 1975 71 NA 3.4 17 24.5 4.5 0 14.5 14.5 ## 6 $pent 2000 91 NA 4.3 45 4.5 4.5 4.5 14.5 14.5 ## # ... with 13 more variables: r6 <dbl>, r7 <dbl>, r8 <dbl>, r9 <dbl>, ## # r10 <dbl>, mpaa <chr>, Action <int>, Animation <int>, Comedy <int>, ## # Drama <int>, Documentary <int>, Romance <int>, Short <int>
tipi = names(movies)[18:23]
seznam = list()
for (i in 1:length(tipi)) {
tip = tipi[[i]]
seznam[[i]] <- movies %>%
filter_(paste(tip, "==", 1)) %>%
select(Budget=budget, Short, Year=year) %>%
mutate(Type=tip)
}
myMovies <- do.call(rbind, seznam)
ggplot(data=myMovies, aes(x=Type, fill=Type)) + geom_bar()
ggplot(data=myMovies, aes(x=Type, fill=factor(Short))) + geom_bar()
ggplot(data=myMovies, aes(x=Type, fill=factor(Short))) + geom_bar(position="stack")
ggplot(data=myMovies, aes(x=Type, fill=factor(Short))) + geom_bar(position="dodge")
ggplot(data=myMovies, aes(x=Type, y=Budget)) + geom_boxplot()
ggplot(data=myMovies, aes(x=Type, y=Budget)) + geom_boxplot() + scale_y_log10()
ggplot(data=myMovies, aes(x=Type, y=Budget)) + geom_boxplot() + scale_y_log10() + geom_point()
ggplot(data=myMovies, aes(x=Type, y=Budget)) + geom_jitter() + geom_boxplot(alpha=I(0.6)) + scale_y_log10()
ggplot(data=myMovies, aes(x=Type, y=Budget)) + geom_jitter() + geom_boxplot(alpha=I(0.6)) + scale_y_log10()
ggplot(data=myMovies, aes(x=Year, y=Type, size=Budget)) + geom_point()
head(ToothGrowth)
## len supp dose ## 1 4.2 VC 0.5 ## 2 11.5 VC 0.5 ## 3 7.3 VC 0.5 ## 4 5.8 VC 0.5 ## 5 6.4 VC 0.5 ## 6 10.0 VC 0.5
ggplot(data=ToothGrowth, aes(x=dose, y=len)) + geom_point()
ggplot(data=ToothGrowth, aes(x=dose, y=len, col=supp)) + geom_point()
ggplot(data=ToothGrowth, aes(x=dose, y=len, col=supp)) + geom_point() + facet_grid(.~supp)
ggplot(data=ToothGrowth, aes(x=dose, y=len, col=supp)) + geom_point() + facet_grid(.~supp) + stat_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data=ToothGrowth, aes(x=dose, y=len, col=supp)) + geom_point() + facet_grid(.~supp) + stat_smooth(method="lm")
head(economics)
## # A tibble: 6 x 6 ## date pce pop psavert uempmed unemploy ## <date> <dbl> <int> <dbl> <dbl> <int> ## 1 1967-07-01 507. 198712 12.5 4.5 2944 ## 2 1967-08-01 510. 198911 12.5 4.7 2945 ## 3 1967-09-01 516. 199113 11.7 4.6 2958 ## 4 1967-10-01 513. 199311 12.5 4.9 3143 ## 5 1967-11-01 518. 199498 12.5 4.7 3066 ## 6 1967-12-01 526. 199657 12.1 4.8 3018
ggplot(data=economics, aes(x=date, y=unemploy)) + geom_line()
myMovies$RoundYear <- signif(myMovies$Year, digits = 3) ggplot(data=myMovies,aes(Budget)) + geom_histogram(binwith=1) + facet_grid(.~Type)+ scale_x_log10()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data=myMovies,aes(Budget)) + geom_histogram(binwith=1) + facet_grid(Type~.) + scale_x_log10()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data=myMovies,aes(Budget)) + geom_histogram(binwith=1) + facet_grid(RoundYear~Type) + scale_x_log10()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data=subset(myMovies, RoundYear>1980), aes(Budget)) + geom_histogram(binwith=1) + facet_grid(.~Type+RoundYear) + scale_x_log10()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.